/*
This code creates several combined ASMB-NMDB datasets, with information
on borrowers and loans from both the ASMB and NMDB.
*/



/**********************************************************************************************
Step 1: Import and save a slightly cleaned-up version of the ASMB
**********************************************************************************************/

* import the raw ASMB data
import sas using $ASMB/asmb_beta.sas7bdat, clear

* Merge to the crosswalk to get the NMDB loannb
rename ASMBID asmbid
merge 1:1 asmbid using "$CROSSWALK/asmb_nmdb_v130_crosswalk.dta", nogen


* Note that about 1.6% of ASMB responses cannot be matched to NMDB loans.
* According to Saty, this happens when survey respondents were not actually 
* eligible for the ASMB.  Drop these loans.
drop if missing(loannb)

save "$DATA_OUT/ASMB.dta", replace



/**********************************************************************************************
Step 2: Import and Merge Borrower Info
**********************************************************************************************/

** One dataset with data on borrowers at the individual-level
use "$DATA_OUT/ASMB.dta", clear

* Merge in borrower data from the NMDB
* Note that a loan may match to multiple borrowers
merge 1:m loannb using "$NMDB/nmdb_20210113_v130_borrowers.dta", keep(match) nogen


* Save the ASMB-NMDB borrower data
save "$DATA_OUT/NMDB_ASMB_borrowers.dta", replace


** Create another dataset with data on all borrowers on a loan combined, for use in Figure 1
use  "$DATA_OUT/NMDB_ASMB_borrowers.dta", clear

* Sort so that the first borrower on a mortgage comes first
gsort loannb personnb

* Declare useful variables
gen one = 1
gen birth_year = 1997 + (birth_month / 12)
gen age = round(survey_year - birth_year)

* For credit score, race/ethnicity, and age, take the first value per loan
* Because the data was sorted above, this will take the values for the
* first borrower on a mortgage
collapse (min) first_time (first) race ethnic age score_orig (sum) num_borrowers = one, by(asmbid)

* Create one race/ethnicity variable
gen race_ethnic     = "White" if race==1
replace race_ethnic = "Black" if race==2
replace race_ethnic = "Hispanic" if ethnic==1
replace race_ethnic = "Other" if mi(race)

* Save the dataset on all borrowers on a loan combined,
save "$DATA_OUT/NMDB_ASMB_borrowers_combined.dta", replace

/**********************************************************************************************
Step 3: Merge in loan-level data from NMDB with quarterly info on performance etc.
**********************************************************************************************/

use "$DATA_OUT/ASMB.dta", clear

merge 1:1 loannb using "$NMDB/nmdb_20210113_v130_loans.dta", keep(match) nogen

** Also use the quarterly information to create the 60D proxy for liquidity shocks
do "$DOFILES/9999_create_60D_proxy_for_liquidity_shocks.do"

** Also use the quarterly information to create the MtM home equity proxies
global BKfine_bool = 1 // ASMB has condo information so we can create the fine BK proxy
do "$DOFILES/9999_create_MtM_equity_proxies.do"

save "$DATA_OUT/NMDB_ASMB_loans", replace


/**********************************************************************************************
Step 4: Import and Merge Monthly Loan Data
**********************************************************************************************/


** Merge in the monthly NMDB data
use "$DATA_OUT/ASMB.dta", clear

merge 1:1 loannb using "$NMDB/nmdb_20210113_v130_month1.dta", keep(match) nogen
merge 1:1 loannb using "$NMDB/nmdb_20210113_v130_month2.dta", keep(match) nogen

** Also use the monthly performance information to define defaulters etc.
do "$DOFILES/9999_define_defaulters.do"

gen heavywgt = surveywgt>=10000 //according to Tim Critchfield, these people are in the "current" sample. 

*** Finally, define our sample of defaulters: (1) respondents who went 90+ days delinquent, (2) who were current within 4 years of the survey and (3) in the delinquent subsample of the ASMB.
gen in_default_sample = defaulter==1 & !mi(last_current_month_b4_default) & heavywgt==0
save "$DATA_OUT/NMDB_ASMB_monthly", replace



/**********************************************************************************************
Step 5: Finally create one dataset of ASMB respondents with all variables we want
**********************************************************************************************/

use "$DATA_OUT/NMDB_ASMB_loans", clear

merge 1:1 asmbid using "$DATA_OUT/NMDB_ASMB_monthly", keepusing(heavywgt defaulter in_default_sample dpd_status last_current_month_b4_default last_current_month_b4_delinq current*) assert(match) nogen

merge 1:1 asmbid using "$DATA_OUT/NMDB_ASMB_borrowers_combined.dta", keepusing(age race_ethnic score_orig num_borrowers first_time) assert(match)

gen foreclosure = inlist(x59,5,6)

save "$DATA_OUT/NMDB_ASMB_respondents", replace
